#Gina Miller, Ryan Welch, and Andrew Vonasch
#Replication Models for International Interactions
##################################################

#check if packages are installed, and if not, install them
packages <- c("foreign", "Zelig", "MatchIt", "data.table", "Hmisc", "rms", "dplyr", "plm", "urca", "car", "gplots",
              "tseries", "glmnet", "readstata13")
if (length(setdiff(packages, rownames(installed.packages()))) > 0) {
  install.packages(setdiff(packages, rownames(installed.packages())), repos="http://cran.rstudio.com/")  
}

#attach packages
lapply(packages, library, character.only = T)

####################################################

#turn off scientific notation
options(scipen=999)

#set seed, using random.org(1-10000)
set.seed(868)

#open data, set it as igo.data
igo.data <-read.dta("MilVonWel_Data_23Sep2016.dta", convert.factor=FALSE)

#keep the variables we want for analysis
igo.data1 <-  igo.data[, c("cowcode", "year", "hr_latentmean", "hasnhri", "lostmem", "lpop", "hr_latentmeanlag", "hrfilled", "media_ai_tot", "xconst", "lgdp", "igomemsum")]

#listwise delete
igo.data1 <- na.omit(igo.data1) 

#attach data
attach(igo.data1)

#########################################################
#Matching
#Nearest Neighbor (one to one)
nearest.out1<-matchit(lostmem ~ hrfilled + hasnhri + media_ai_tot + hasnhri + xconst + lgdp + igomemsum + lpop, data = igo.data1, method ="nearest", distance = "logit", discard = "both", reestimate = TRUE)

##Table 1##
nearest.out1
summary(nearest.out1)

#create data frame
nearest.data1<-match.data(nearest.out1)

#Regression
reg<-zelig(hr_latentmean ~ lostmem + hr_latentmeanlag + hrfilled + media_ai_tot + xconst + lgdp + lpop + igomemsum + hasnhri, data = nearest.data1, model= "normal")

##Table 2##
summary(reg)
##############################################

#Time-Series

#Keep variables we want
igo.data2 <-  igo.data[, c("cowcode", "year", "hr_latentmean", "hasnhri", "lostmem", "lostmem_lag", "igomemsumlag1", "igomemsumlag2", "igomemsumlag3", "lpop", "hr_latentmeanlag", "hrfilled", "media_ai_tot", "xconst", "lgdp", "igomemsum_lag", "igomemsum")]

#Create lags for general model
igo.data2$lostmem_lag2 <- NA
igo.data2$lostmem_lag2[igo.data$igomemsumlag3 < igo.data$igomemsumlag2] <- 1
igo.data2$lostmem_lag2[igo.data$igomemsumlag3 >= igo.data$igomemsumlag2] <- 0
igo.data2$lostmem_lag2


igo.data2$lostmem_lag3 <- NA
igo.data2$lostmem_lag3[igo.data$igomemsumlag4 < igo.data$igomemsumlag3] <- 1
igo.data2$lostmem_lag3[igo.data$igomemsumlag4 >= igo.data$igomemsumlag3] <- 0
igo.data2$lostmem_lag3

igo.data2$lostmem_lag4 <- NA
igo.data2$lostmem_lag4[igo.data$igomemsumlag5 < igo.data$igomemsumlag4] <- 1
igo.data2$lostmem_lag4[igo.data$igomemsumlag5 >= igo.data$igomemsumlag4] <- 0
igo.data2$lostmem_lag4

igo.data2 <- setDT(igo.data2)
igo.data2[, hr_latentmean_lag1:=c(NA, hr_latentmean[-.N], by=cowcode)]
igo.data2[, hr_latentmean_lag2:=c(NA, hr_latentmean_lag1[-.N], by=cowcode)]
igo.data2[, hr_latentmean_lag3:=c(NA, hr_latentmean_lag2[-.N], by=cowcode)]
igo.data2[, hr_latentmean_lag4:=c(NA, hr_latentmean_lag3[-.N], by=cowcode)]

#General to specific ADL specification setting
#Start with ADL(4,4)
adl44 <-plm(hr_latentmean ~ hr_latentmean_lag1 + hr_latentmean_lag2 + hr_latentmean_lag3 + hr_latentmean_lag4 + lostmem + lostmem_lag + lostmem_lag2 + lostmem_lag3 + lostmem_lag4 + hrfilled + media_ai_tot + xconst + lgdp + lpop + hasnhri, data = igo.data2, model = "pooling")
summary(adl44)

corr_test44 <- pdwtest(adl44)
corr_test44

adl30 <-plm(hr_latentmean ~ hr_latentmean_lag1 + hr_latentmean_lag2 + hr_latentmean_lag3 + lostmem + hrfilled + media_ai_tot + xconst + lgdp + lpop + hasnhri, data = igo.data2, model = "pooling")
corr_test30 <- pdwtest(adl30)
corr_test30

#Estimate ADL(3,0)
adl30 <-lm(hr_latentmean ~ hr_latentmean_lag1 + hr_latentmean_lag2 + hr_latentmean_lag3 + lostmem + hrfilled + media_ai_tot + xconst + lgdp + lpop + hasnhri, data = igo.data2)

##Table 3##
summary(adl30)
##############

#Long-run multiplier
lrm.adl30 <- function(b0, a1, a2, a3){
  (b0)/(1 - (a1 + a2 + a3))
}

lrm30 <- lrm.adl30(b0 = -0.01608751, a1 = 1.45623202, a2 = -0.52945704, a3 = 0.04399330)
lrm30


#####################################################################################

#################
###Appendix######
#################


#Post-Matching Regression with Polity instead of XCONST

#Use polity instead of executive contraints

igo.data5 <-  igo.data[, c("cowcode", "year", "hr_latentmean", "hasnhri", "lostmem", "lpop", "hr_latentmeanlag", "hrfilled", "media_ai_tot", "polity2", "lgdp", "igomemsum")]

#listwise delete
igo.data5 <- na.omit(igo.data5) #from n = 6513 to 2666, most of it temporal (1979 - 2000)

#attach data
attach(igo.data5)

#########################################################
#Matching
#Nearest Neighbor (one to one)
nearest.out5<-matchit(lostmem ~ hrfilled + hasnhri + media_ai_tot + hasnhri + polity2 + lgdp + igomemsum + lpop, data = igo.data5, method ="nearest", distance = "logit", discard = "both", reestimate = TRUE)
nearest.out5

summary(nearest.out5)

#create data frame
nearest.data5<-match.data(nearest.out5)

#Regression
reg5 <-zelig(hr_latentmean ~ lostmem + hr_latentmeanlag + hrfilled + media_ai_tot + polity2 + lgdp + lpop + igomemsum + hasnhri, data = nearest.data5, model= "normal")

##Table 1###
summary(reg5)
############

#############################################################################

#keep the variables we want for analysis
igo.data4 <-  igo.data[, c("cowcode", "year", "hr_latentmean", "hasnhri", "lostmem", "lpop", "hr_latentmeanlag", "hrfilled", "media_ai_tot", "xconst", "lgdp", "igomemsum", "cwar", "iwar")]

#merge in INGO data
ingos <- read.dta("ingos_std.dta", convert.factors = F)
igo.data4 <- merge(igo.data4, ingos, by = c ("cowcode", "year"))
head(igo.data4)
describe(igo.data4) 

#listwise delete
igo.data4 <- na.omit(igo.data4) 

#attach data
attach(igo.data4)

#Matching
#Nearest Neighbor (one to one)
nearest.out4<-matchit(lostmem ~ hrfilled + hasnhri + media_ai_tot + hasnhri + xconst + lgdp + igomemsum + lpop + ingo_count + cwar + iwar, data = igo.data4, method ="nearest", distance = "logit", discard = "both", reestimate = TRUE)
nearest.out4

summary(nearest.out4)

#create data frame
nearest.data4<-match.data(nearest.out4)

#Regression
reg4<-zelig(hr_latentmean ~ lostmem + hr_latentmeanlag + hrfilled + media_ai_tot + xconst + lgdp + lpop + igomemsum + hasnhri + ingo_count + cwar + iwar, data = nearest.data4, model= "normal")

##Table 2##
summary(reg4)
#############


#Regression without Matching

igo.data1 <-  igo.data[, c("cowcode", "year", "hr_latentmean", "hasnhri", "lostmem", "lpop", "hr_latentmeanlag", "hrfilled", "media_ai_tot", "xconst", "lgdp", "igomemsum")]
reg0 <-zelig(hr_latentmean ~ lostmem + hr_latentmeanlag + hrfilled + media_ai_tot + xconst + lgdp + lpop + igomemsum + hasnhri, data = igo.data1, model= "normal")

##Table 3##
summary(reg0) 
############

#########################################################



#First Differences Regression

#Create the change variables
igo.data2$hr_diff <- igo.data2$hr_latentmean - igo.data2$hr_latentmeanlag
igo.data2$lostmem_diff <- igo.data2$lostmem - igo.data2$lostmem_lag


fd.mod1 <- lm(hr_diff ~ lostmem_diff + hrfilled + media_ai_tot + xconst + igomemsum + lgdp + lpop + hasnhri, data = igo.data2)

##Table 4##
summary(fd.mod1)
############


#######################################################



###################################################

